package com.chb.weibo1;

import java.io.IOException;
import java.io.StringReader;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;

public class FirstMapper extends Mapper<Text, Text, Text, IntWritable>{
	@Override
	protected void map(Text key, Text value, Context context)
			throws IOException, InterruptedException { 
		String id = key.toString();
		String content = value.toString();
		
		StringReader sr = new StringReader(content);
		//词条拆分器
		IKSegmenter ikSegmenter = new IKSegmenter(sr, true);
		Lexeme word = null;
		
		while((word = ikSegmenter.next()) != null){
			String w = word.getLexemeText();
			//统计词频
			context.write(new Text(w+"_"+id), new IntWritable(1));
		}
		//每一行数据就是一个微博的id,  内容
		context.write(new Text("count"), new IntWritable(1));
	}
}
